This notebook presents a resume of the GPX loaded data.
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import gpxpy
import gpxpy.gpx
from fiona.crs import from_epsg
from keplergl import KeplerGl
import geopandas as gpd
import movingpandas as mpd
import uuid
import hvplot.pandas
from pyproj import CRS
# Injected parameters
from dagster import seven as __dm_seven
import dagstermill as __dm_dagstermill
context = __dm_dagstermill._reconstitute_job_context(
**{
key: __dm_seven.json.loads(value)
for key, value
in {'executable_dict': '{"__class__": "ReconstructablePipeline", "asset_selection": {"__frozenset__": [{"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_01_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_02_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db_track"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_01_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_02_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_20230428_artefacto_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_smooth"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_smooth"]}]}, "pipeline_name": "__ASSET_JOB", "repository": {"__class__": "ReconstructableRepository", "container_context": null, "container_image": null, "entry_point": ["dagster"], "executable_path": "/home/jag/anaconda3/envs/llacta-rumbos/bin/python3.10", "pointer": {"__class__": "ModuleCodePointer", "fn_name": "defs", "module": "tutorial_project", "working_directory": "/home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project"}, "repository_load_data": null}, "solid_selection_str": null}', 'job_run_dict': '{"__class__": "PipelineRun", "asset_selection": {"__frozenset__": [{"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_01_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_02_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db_track"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_01_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_02_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_20230428_artefacto_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_smooth"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_smooth"]}]}, "execution_plan_snapshot_id": "53d72acd7cbb9973aaedde948f0d1dcfe094cf59", "external_pipeline_origin": {"__class__": "ExternalPipelineOrigin", "external_repository_origin": {"__class__": "ExternalRepositoryOrigin", "repository_location_origin": {"__class__": "ManagedGrpcPythonEnvRepositoryLocationOrigin", "loadable_target_origin": {"__class__": "LoadableTargetOrigin", "attribute": null, "executable_path": null, "module_name": "tutorial_project", "package_name": null, "python_file": null, "working_directory": "/home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project"}, "location_name": "tutorial_project"}, "repository_name": "__repository__"}, "pipeline_name": "__ASSET_JOB"}, "has_repository_load_data": false, "mode": null, "parent_run_id": null, "pipeline_code_origin": {"__class__": "PipelinePythonOrigin", "pipeline_name": "__ASSET_JOB", "repository_origin": {"__class__": "RepositoryPythonOrigin", "code_pointer": {"__class__": "ModuleCodePointer", "fn_name": "defs", "module": "tutorial_project", "working_directory": "/home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project"}, "container_context": {}, "container_image": null, "entry_point": ["dagster"], "executable_path": "/home/jag/anaconda3/envs/llacta-rumbos/bin/python3.10"}}, "pipeline_name": "__ASSET_JOB", "pipeline_snapshot_id": "ad06cdf3396cfaebb77877426f5d0d38d6e149c3", "root_run_id": null, "run_config": {}, "run_id": "ed1a0d6a-ea17-477c-9eca-74e1f40d4d81", "solid_selection": null, "solids_to_execute": null, "status": {"__enum__": "PipelineRunStatus.STARTING"}, "step_keys_to_execute": null, "tags": {".dagster/grpc_info": "{\\"host\\": \\"localhost\\", \\"socket\\": \\"/tmp/tmpf5j209eu\\"}"}}', 'node_handle_kwargs': '{"name": "MG91_20230428_artefacto__raw_explore__MG91_artefacto_reloj_20230428_01_explore", "parent": null}', 'instance_ref_dict': '{"__class__": "InstanceRef", "compute_logs_data": {"__class__": "ConfigurableClassData", "class_name": "LocalComputeLogManager", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/storage\\n", "module_name": "dagster.core.storage.local_compute_log_manager"}, "custom_instance_class_data": null, "event_storage_data": {"__class__": "ConfigurableClassData", "class_name": "SqliteEventLogStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/history/runs/\\n", "module_name": "dagster.core.storage.event_log"}, "local_artifact_storage_data": {"__class__": "ConfigurableClassData", "class_name": "LocalArtifactStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output\\n", "module_name": "dagster.core.storage.root"}, "run_coordinator_data": {"__class__": "ConfigurableClassData", "class_name": "DefaultRunCoordinator", "config_yaml": "{}\\n", "module_name": "dagster.core.run_coordinator"}, "run_launcher_data": {"__class__": "ConfigurableClassData", "class_name": "DefaultRunLauncher", "config_yaml": "{}\\n", "module_name": "dagster"}, "run_storage_data": {"__class__": "ConfigurableClassData", "class_name": "SqliteRunStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/history/\\n", "module_name": "dagster.core.storage.runs"}, "schedule_storage_data": {"__class__": "ConfigurableClassData", "class_name": "SqliteScheduleStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/schedules\\n", "module_name": "dagster.core.storage.schedules"}, "scheduler_data": {"__class__": "ConfigurableClassData", "class_name": "DagsterDaemonScheduler", "config_yaml": "{}\\n", "module_name": "dagster.core.scheduler"}, "secrets_loader_data": null, "settings": {}, "storage_data": {"__class__": "ConfigurableClassData", "class_name": "DagsterSqliteStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output\\n", "module_name": "dagster.core.storage.sqlite_storage"}}', 'step_key': '"MG91_20230428_artefacto__raw_explore__MG91_artefacto_reloj_20230428_01_explore"', 'output_log_path': '"/tmp/tmpp1tuyhss"', 'marshal_dir': '"/tmp/dagstermill/ed1a0d6a-ea17-477c-9eca-74e1f40d4d81/marshal"', 'run_config': '{}'}.items()
}
)
data = __dm_dagstermill._load_input_parameter('data')
code asset_name \
0 MG91 MG91_artefacto_reloj_20230428_01
1 MG91 MG91_artefacto_reloj_20230428_02
2 MG91 MG91_persona_reloj_20230428_01
3 MG91 MG91_persona_reloj_20230428_02
4 MG91 MG91_artefacto_reloj_20230503_01
5 MG91 MG91_artefacto_reloj_20230503_02
6 MG91 MG91_persona_reloj_20230503_01
7 MG91 MG91_persona_reloj_20230503_02
file_name folder_name owner \
0 MG91_artefacto_reloj_20230428_01.gpx MG91 Emilia Acurio
1 MG91_artefacto_reloj_20230428_02.gpx MG91 Emilia Acurio
2 MG91_persona_reloj_20230428_01.gpx MG91 Emilia Acurio
3 MG91_persona_reloj_20220428_02.gpx MG91 Emilia Acurio
4 MG91_artefacto_reloj_20230503_01.gpx MG91 Emilia Acurio
5 MG91_artefacto_reloj_20230503_02.gpx MG91 Emilia Acurio
6 MG91_persona_reloj_20230503_01.gpx MG91 Emilia Acurio
7 MG91_persona_reloj_20230503_02.gpx MG91 Emilia Acurio
group date type
0 MG91_20230428_artefacto 20230428 artefacto
1 MG91_20230428_artefacto 20230428 artefacto
2 MG91_20230428_persona 20230428 persona
3 MG91_20230428_persona 20230428 persona
4 MG91_20230503_artefacto 20230503 artefacto
5 MG91_20230503_artefacto 20230503 artefacto
6 MG91_20230503_persona 20230503 persona
7 MG91_20230503_persona 20230503 persona
RESULT AssetsDefinition with key ["workdir", "MG91_20230428_artefacto_traj"]
RESULT AssetsDefinition with key ["workdir", "MG91_20230428_persona_traj"]
RESULT AssetsDefinition with key ["workdir", "MG91_20230503_artefacto_traj"]
RESULT AssetsDefinition with key ["workdir", "MG91_20230503_persona_traj"]
Hello job
/home/jag/anaconda3/envs/llacta-rumbos/lib/python3.10/site-packages/dagster/_core/definitions/resolved_asset_deps.py:22: ExperimentalWarning: Asset ["workdir", "MG91_persona_reloj_20230503_02_traj_clean"]'s dependency 'MG91_persona_reloj_20230503_02_traj' was resolved to upstream asset ["workdir", "MG91_persona_reloj_20230503_02_traj"], because the name matches and they're in the same group. This is experimental functionality that may change in a future release. To mute warnings for experimental functionality, invoke warnings.filterwarnings("ignore", category=dagster.ExperimentalWarning) or use one of the other methods described at https://docs.python.org/3/library/warnings.html#describing-warning-filters.
self._deps_by_assets_def_id = resolve_assets_def_deps(assets_defs, source_assets)
/home/jag/anaconda3/envs/llacta-rumbos/lib/python3.10/site-packages/dagster/_core/definitions/resolved_asset_deps.py:22: ExperimentalWarning: Asset ["workdir", "MG91_artefacto_reloj_20230428_01_traj_clean"]'s dependency 'MG91_artefacto_reloj_20230428_01_traj' was resolved to upstream asset ["workdir", "MG91_artefacto_reloj_20230428_01_traj"], because the name matches and they're in the same group. This is experimental functionality that may change in a future release. To mute warnings for experimental functionality, invoke warnings.filterwarnings("ignore", category=dagster.ExperimentalWarning) or use one of the other methods described at https://docs.python.org/3/library/warnings.html#describing-warning-filters.
self._deps_by_assets_def_id = resolve_assets_def_deps(assets_defs, source_assets)
2023-06-20 18:51:00 -0500 - dagster - DEBUG - __ASSET_JOB - ed1a0d6a-ea17-477c-9eca-74e1f40d4d81 - 2214827 - RESOURCE_INIT_STARTED - Starting initialization of resources [io_manager, mobilityDb_manager, output_notebook_io_manager].
2023-06-20 18:51:00 -0500 - dagster - DEBUG - __ASSET_JOB - ed1a0d6a-ea17-477c-9eca-74e1f40d4d81 - 2214827 - RESOURCE_INIT_SUCCESS - Finished initialization of resources [io_manager, mobilityDb_manager, output_notebook_io_manager].
print("This dataset contains {} records.\nThe first lines are:".format(len(data)))
data.head()
This dataset contains 8000 records. The first lines are:
| lat | lon | elevation | time | file_path | fila_name | track_id | codigo | id | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -2.895220 | -78.986263 | None | 2023-04-28 07:30:57+00:00 | data/MG91/MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01 | MG91 | 1 | POINT (-78.98626 -2.89522) |
| 1 | -2.895220 | -78.986260 | None | 2023-04-28 07:30:58+00:00 | data/MG91/MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01 | MG91 | 1 | POINT (-78.98626 -2.89522) |
| 2 | -2.895220 | -78.986260 | None | 2023-04-28 07:30:59+00:00 | data/MG91/MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01 | MG91 | 1 | POINT (-78.98626 -2.89522) |
| 3 | -2.895220 | -78.986258 | None | 2023-04-28 07:31:00+00:00 | data/MG91/MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01 | MG91 | 1 | POINT (-78.98626 -2.89522) |
| 4 | -2.895217 | -78.986260 | None | 2023-04-28 07:31:01+00:00 | data/MG91/MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01.gpx | MG91_artefacto_reloj_20230428_01 | MG91 | 1 | POINT (-78.98626 -2.89522) |
data.columns
Index(['lat', 'lon', 'elevation', 'time', 'file_path', 'fila_name', 'track_id',
'codigo', 'id', 'geometry'],
dtype='object')
data.dtypes
lat float64 lon float64 elevation object time datetime64[ns, UTC] file_path object fila_name object track_id object codigo object id int64 geometry geometry dtype: object
data.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World. - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
data.hvplot(title='Geographic extent of the dataset', geo=True, tiles='OSM', frame_height=450)
code = data['codigo'].unique()[0]
temp = data.to_crs(CRS(32717))
temp['geometry'] = temp['geometry'].buffer(5)
total_area = temp.dissolve(by='codigo').area
total_area = total_area[code]/1000
print('The total area covered by the data is: {:,.2f} km2'.format(total_area))
The total area covered by the data is: 63.77 km2
print("The dataset covers the time between {} and {}.".format(data.time.min(), data.time.max()))
The dataset covers the time between 2023-04-28 07:30:57+00:00 and 2023-04-28 09:44:16+00:00.
print("That's {}".format(data.time.max() - data.time.min()))
That's 0 days 02:13:19
Each complete 10m interval should have 600 counts, except first and last
ig, axs = plt.subplots(figsize=(12, 4))
df = data
df.set_index('time')
df.groupby(pd.Grouper(key='time', axis=0,
freq='10min'))["id"].sum().plot(kind='bar', ax=axs)
<Axes: xlabel='time'>
As the tracking gps are sensing with a resolution of 1s we expect to have records only in 1 second interval
t = data.reset_index().time
df = data.assign(delta_t=t.diff().values)
df['delta_t'] = df['delta_t'].dt.total_seconds()
pd.DataFrame(df).hvplot.hist('delta_t', title='Histogram of intervals between consecutive records (in seconds)', bins=60, bin_range=(0, 60))
For example: Does the data contain unattainable speeds?
traj = mpd.Trajectory(data, traj_id='id', t='time')
traj.add_speed()
max_speed = traj.df.speed.max()
print("The highest computed speed is {:,.2f} m/s ({:,.2f} km/h)".format(max_speed, max_speed*3600/1000))
/home/jag/anaconda3/envs/llacta-rumbos/lib/python3.10/site-packages/movingpandas/trajectory.py:136: TimeZoneWarning: Time zone information dropped from trajectory. All dates and times will use local time. To use UTC or a different time zone, convert and drop time zone information prior to trajectory creation. warnings.warn(
The highest computed speed is 65.16 m/s (234.59 km/h)
pd.DataFrame(traj.df).hvplot.hist('speed', title='Histogram of speeds (in meters per second)', bins=100)
speed = pd.DataFrame(traj.df).sort_values(by='speed', ascending=False)
speed_km_h = speed['speed']*3600/1000
speed_km_h.head(20)
time 2023-04-28 09:11:04 234.592493 2023-04-28 09:41:25 125.858600 2023-04-28 09:44:05 101.109475 2023-04-28 09:41:21 93.930480 2023-04-28 09:11:05 86.082492 2023-04-28 08:53:59 82.932706 2023-04-28 08:56:20 82.547829 2023-04-28 07:32:28 51.294701 2023-04-28 09:42:33 48.187384 2023-04-28 09:41:53 47.366488 2023-04-28 09:42:31 47.171383 2023-04-28 09:11:13 44.299663 2023-04-28 09:41:26 39.884652 2023-04-28 07:34:17 39.633788 2023-04-28 09:42:32 35.654988 2023-04-28 08:56:21 35.033008 2023-04-28 08:53:13 34.940074 2023-04-28 07:34:14 34.867773 2023-04-28 09:41:52 31.343546 2023-04-28 08:53:14 30.601808 Name: speed, dtype: float64
import dagstermill as __dm_dagstermill
__dm_dagstermill._teardown()